# 导入模块
import pandas as pd
# 导入gensim的模型和语料库
from gensim import models, corpora

pos = pd.read_csv('pos.txt', encoding='utf-8', header=None)
neg = pd.read_csv('neg.txt', encoding='utf-8', header=None)
# 定义一个分割函数，然后用 apply 广播
neg[1] = neg[0].apply(lambda s: s.split(' '))
pos[1] = pos[0].apply(lambda s: s.split(' '))


# 构造LDA模型,提取关键字

# 正面主题分析
# 建立词典
pos_dict = corpora.Dictionary(pos[1])
# 建立语料库
pos_corpus = [pos_dict.doc2bow(i) for i in pos[1]]
# LDA模型训练
pos_lda = models.LdaModel(pos_corpus, num_topics=3, id2word=pos_dict)
print("\n正面评价")
# 输出每个主题
for i in range(3):
    print("主题%d : " % i)
    print(pos_lda.print_topic(i))


# 负面主题分析
# 建立词典
neg_dict = corpora.Dictionary(neg[1])
# 建立语料库
neg_corpus = [neg_dict.doc2bow(i) for i in neg[1]]
# LDA模型训练
neg_lda = models.LdaModel(neg_corpus, num_topics=3, id2word=neg_dict)
print("\n负面评价")
# 输出每个主题
for i in range(3):
    print("主题%d : " % i)
    print(neg_lda.print_topic(i))



